<?php
require "../db.inc.php";
$tb_name = "one.sys_doctor";

$file = "fullIndex.cfm";
$base = "http://www.castleconnolly.com/doctors/";
$url = $base . $file;

if ($argc > 1 && is_file("profiles/{$argv[1]}")) {
	$file = "profiles/{$argv[1]}";
	process($file);
	die();
}

if (!is_file($file)) {
	$cmd = "wget -O $file $url";
	system($cmd);
}
$arr = file($file);
foreach ($arr as $line) {
	if (preg_match("/<a href=\"(.*)\">(.*)<\/a><br>/", $line, $matches)) {
		$file = $matches[1];
		$name = $matches[2];
		$url = $base . "$file";
		if (substr($file, 0, 9) != "profiles/") {
			continue;
		}

		//if (!is_file($file) || filesize($file) == 0) {
		if (!is_file($file)) {
			echo "$url\n";
			$cmd = "wget -O $file $url";
			system($cmd);
		}

		process($file);
	}
}

function process($file) {
	$code = substr($file, 9);
	$id = get_id($code);
	$arr = file($file);
	$data = get_data($arr, $id);
	update($data, $id);
}

function get_id($code) {
	global $db, $tb_name;

	$code = addslashes($code);
	$sql = "SELECT id FROM $tb_name WHERE code='$code'";
	$id = $db->getOne($sql);
	if ($id == false) {
		$sql = "INSERT INTO $tb_name (code) VALUES ('$code')";
		if ($db->query($sql)) {
			$id = $db->insert_id;
		}
	} else {
		$id = intval($id);
	}

	return $id;
}

function update($data, $id) {
	global $db, $tb_name;

	$sets = "";
	foreach ($data as $key => $value) {
		$sets .= ($sets ? "," : "") . $key . "=\"" . addslashes($value) . "\"";
	}

	$result = false;
	if ($sets) {
		$sql = "UPDATE $tb_name SET $sets WHERE id=$id";
		echo "$sql\n";
		$result = $db->query($sql);
	}

	return $result;
}

function get_data($arr, $id) {
	global $base;

	$n = count($arr);

	$data = array();
	for ($i = 0; $i < $n; $i++) {
		$line = trim($arr[$i]);

		if (strpos($line, " id=\"pstatement\" ")) {
			$data['pstatement'] = "";
			while ($i < $n) {
				$i++;
				$line = trim($arr[$i]);
				if ($line == "</div>") {
					break;
				}

				$data['pstatement'] .= str_replace("<br>", "\n", $line);
			}
			$data['pstatement'] = trim($data['pstatement']);
		}

		if ($line == "Medical School") {
			$data['school'] = "";
			$i++;
			while ($i < $n) {
				$i++;
				$line = trim($arr[$i]);
				if ($line == "</div>") {
					break;
				}

				$data['school'] .= str_replace("<br>", "\n", $line);
			}
			$data['school'] = trim($data['school']);
		}

		if ($line == "1st Residency") {
			$data['fresidency'] = "";
			$i++;
			while ($i < $n) {
				$i++;
				$line = trim($arr[$i]);
				if ($line == "</div>" || substr($line, 0, 5) == "<div ") {
					break;
				}
				$data['fresidency'] .= str_replace("<br>", "\n", $line);
			}
			$data['fresidency'] = trim($data['fresidency']);
		}

		if ($line == "2nd Residency") {
			$data['sresidency'] = "";
			$i++;
			while ($i < $n) {
				$i++;
				$line = trim($arr[$i]);
				if ($line == "</div>" || substr($line, 0, 5) == "<div ") {
					break;
				}
				$data['sresidency'] .= str_replace("<br>", "\n", $line);
			}
			$data['sresidency'] = trim($data['sresidency']);
		}

		if (strpos($line, " id=\"credentials\" ")) {
			$data['credentials'] = "";
			while ($i < $n) {
				$i++;
				$line = trim($arr[$i]);
				if ($line == "</div>") {
					break;
				}

				if ($line) {
					$data['credentials'] .= strip_tags($line) . "\n";
				}
			}
			$data['credentials'] = trim($data['credentials']);
		}

	}

	return $data;
}
